Update of explore-JULES-ES-1p0.Rmd that uses the new packages and functions
We build and test gaussian process emulators, perform a sensitivity analyis, and constrain the input space of Jules.
Andy thinks there might be mileage in analysing the atmospheric growth, which is here. /home/h01/hadaw/Research/ES_PPE/Oct20
At the moment, this vignette is hampered by the fact that emulators are failing on a few of the outputs which represent change over the historical period. The emulator is fine for predicting absolute values in the modern period.
Andy would like to see timeseries of: cVeg, cSoil and nbp, npp in GtC and GtC/yr.
Load libraries, functions and data.
# Load helper functions
knitr::opts_chunk$set(fig.path = "figs/", echo = TRUE, message = FALSE, warnings = FALSE)
# load some helper functions
source('~/brazilCSSP/code/brazil_cssp/per_pft.R') # eventually, move the relevant functions
source('explore-JULES-ES-1p0_PPE_functions.R')
# Load packages
library(RColorBrewer)
library(fields)
library(MASS)
library(DiceKriging)
library(DiceEval)
library(ncdf4)
library(ncdf4.helpers)
library(foreach)
library(emtools)
library(imptools)
library(viztools)
library(julesR)
# Some pallete options
yg = brewer.pal(9, "YlGn")
ryb = brewer.pal(11, "RdYlBu")
byr = rev(ryb)
rb = brewer.pal(11, "RdBu")
br = rev(rb)
blues = brewer.pal(9, 'Blues')
cbPal <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ysec = 60*60*24*365
years <- 1850:2013
ensloc <- '/project/carbon_ppe/JULES-ES-1p0_PPE/'
ensmember <- 'P0000/'
subdir <- 'stats/'
floc <- paste0(ensloc,ensmember,subdir)
fn <- 'JULES-ES-1p0_P0000_Annual_global.nc'
# test file
nc <- nc_open(paste0(floc,fn))
# What variables are in the file?
varlist <- nc.get.variable.list(nc)
for(i in 1:length(varlist)){
var <- varlist[i]
print(var)
print(ncatt_get(nc,var,"long_name")[[2]])
}
## [1] "nbp_lnd_sum"
## [1] "Carbon Mass Flux out of Atmosphere Due to Net Biospheric Production on Land"
## [1] "year"
## [1] "year"
## [1] "nbp_lnd_mean"
## [1] "Carbon Mass Flux out of Atmosphere Due to Net Biospheric Production on Land"
## [1] "fLuc_lnd_sum"
## [1] "Net Carbon Mass Flux into Atmosphere Due to Land-Use Change"
## [1] "fLuc_lnd_mean"
## [1] "Net Carbon Mass Flux into Atmosphere Due to Land-Use Change"
## [1] "npp_nlim_lnd_sum"
## [1] "Net Primary Production on Land as Carbon Mass Flux"
## [1] "npp_nlim_lnd_mean"
## [1] "Net Primary Production on Land as Carbon Mass Flux"
## [1] "cSoil_lnd_sum"
## [1] "carbon mass in model soil pool"
## [1] "cSoil_lnd_mean"
## [1] "carbon mass in model soil pool"
## [1] "cVeg_lnd_sum"
## [1] "carbon mass in vegetation"
## [1] "cVeg_lnd_mean"
## [1] "carbon mass in vegetation"
## [1] "landCoverFrac_lnd_sum"
## [1] "Plant Functional Type Grid Fraction"
## [1] "landCoverFrac_lnd_mean"
## [1] "Plant Functional Type Grid Fraction"
## [1] "fHarvest_lnd_sum"
## [1] "Carbon Mass Flux into Atmosphere Due to Crop Harvesting"
## [1] "fHarvest_lnd_mean"
## [1] "Carbon Mass Flux into Atmosphere Due to Crop Harvesting"
## [1] "lai_lnd_sum"
## [1] "leaf area index"
## [1] "lai_lnd_mean"
## [1] "leaf area index"
## [1] "rh_lnd_sum"
## [1] "Total Heterotrophic Respiration on Land as Carbon Mass Flux"
## [1] "rh_lnd_mean"
## [1] "Total Heterotrophic Respiration on Land as Carbon Mass Flux"
## [1] "treeFrac_lnd_sum"
## [1] "Fractional cover of each surface type"
## [1] "treeFrac_lnd_mean"
## [1] "Fractional cover of each surface type"
## [1] "c3PftFrac_lnd_sum"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "c3PftFrac_lnd_mean"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "c4PftFrac_lnd_sum"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "c4PftFrac_lnd_mean"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "shrubFrac_lnd_sum"
## [1] "Percentage Cover by Shrub"
## [1] "shrubFrac_lnd_mean"
## [1] "Percentage Cover by Shrub"
## [1] "baresoilFrac_lnd_sum"
## [1] "Bare Soil Percentage Area Coverage"
## [1] "baresoilFrac_lnd_mean"
## [1] "Bare Soil Percentage Area Coverage"
## [1] "residualFrac_lnd_sum"
## [1] "Percentage of Grid Cell That Is Land but neither Vegetation Covered nor Bare Soil"
## [1] "residualFrac_lnd_mean"
## [1] "Percentage of Grid Cell That Is Land but neither Vegetation Covered nor Bare Soil"
extractTimeseries <- function(nc, variable){
dat <- ncvar_get(nc, variable)
out <- dat
out
}
makeTimeseriesEnsemble <- function(variable, nens = 499, nts = 164, cn = 1850:2013){
# nens is number of ensemble members
# nts length of timeseries
# cn is colnames()
datmat <- matrix(NA, nrow = nens, ncol = nts)
colnames(datmat) <- cn
enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")
floc <- paste0(ensloc,ensmember,subdir)
for(i in 1:nens){
vec <- rep(NA,nts)
ensmember <- enslist[i]
fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
try(nc <- nc_open(paste0(fn)))
try(dat <- extractTimeseries(nc, variable))
datmat[i, ] <- dat
nc_close(nc)
}
datmat
}
par(mfrow= c(1,4), las = 1)
plotcol <- c(makeTransparent('black', 70))
ylim = c(-10, 10)
matplot(years, t(nbp_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'GtC/year', main = 'NBP',
bty = 'n')
ylim = range(c(npp_ens[,1], npp_ens[,164]))
matplot(years, t(npp_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'GtC/year', main = 'NPP',
bty = 'n')
ylim = range(c(cSoil_ens[,1], cSoil_ens[,164]))
matplot(years, t(cSoil_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'GtC', main = 'Soil Carbon',
bty = 'n')
ylim = range(c(cVeg_ens[,1], cVeg_ens[,164]))
matplot(years, t(cVeg_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'GtC', main = 'Vegetation Carbon',
bty = 'n')
par(mfrow= c(1,4), las = 1)
ylim = range(lai_lnd_mean_ens)
matplot(years, t(lai_lnd_mean_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'LAI', main = 'Leaf area index',
bty = 'n')
ylim = range(treeFrac_lnd_mean_ens)
matplot(years, t(treeFrac_lnd_mean_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'fraction', main = 'Tree fraction',
bty = 'n')
ylim = range(shrubFrac_lnd_mean_ens)
matplot(years, t(shrubFrac_lnd_mean_ens ), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'fraction', main = 'Shrub fraction',
bty = 'n')
ylim = range(baresoilFrac_lnd_mean_ens)
matplot(years, t(baresoilFrac_lnd_mean_ens ), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'fraction', main = 'bare soil fraction',
bty = 'n')
par(mfrow= c(1,3), las = 1)
ylim = range(rh_lnd_sum_ens)
matplot(years, t(rh_lnd_sum_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'fraction', main = 'Heterotrophic Respiration',
bty = 'n')
ylim = range(fLuc_lnd_sum_ens)
matplot(years, t(fLuc_lnd_sum_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'GtC/year', main = 'Land use change emissions',
bty = 'n')
ylim = range(fHarvest_lnd_sum_ens)
matplot(years, t(fHarvest_lnd_sum_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'GtC/year', main = 'Harvest C flux to atmosphere',
bty = 'n')
npp_ens_anom <- anomalizeTSmatrix(npp_ens, 1:20)
nbp_ens_anom <- anomalizeTSmatrix(nbp_ens, 1:20)
cSoil_ens_anom <- anomalizeTSmatrix(cSoil_ens, 1:20)
cVeg_ens_anom <- anomalizeTSmatrix(cVeg_ens, 1:20)
par(mfrow= c(1,4), las = 1)
ylim = range(-10, 10)
matplot(years, t(nbp_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'NBP sum Anomaly', main = 'NBP Anomaly',
bty = 'n')
ylim = range(c(npp_ens_anom[,1], npp_ens_anom[,164]))
matplot(years, t(npp_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'NPP sum Anomaly', main = 'NPP Anomaly',
bty = 'n')
ylim = range(c(cSoil_ens_anom[,1], cSoil_ens_anom[,164]))
matplot(years, t(cSoil_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'Soil carbon sum anomaly', main = 'Soil Carbon Anomaly',
bty = 'n')
ylim = range(c(cVeg_ens_anom[,1], cVeg_ens_anom[,164]))
matplot(years, t(cVeg_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
ylab = 'Veg carbon sum anomaly', main = 'Vegetation Carbon Anomaly',
bty = 'n')
# for each of the variables in the file, average the last 20 years as the "modern" value,
# and then place in a matrix
modernValue <- function(nc, variable, ix){
# A basic function to read a variable and
# take the mean of the timeseries at locations ix
dat <- ncvar_get(nc, variable)
out <- mean(dat[ix])
out
}
#144:164 is the 1993:2013
modernValue(nc = nc, variable = "npp_nlim_lnd_mean", ix = 144:164)
# apply to the test file to check it works
vec <- sapply(varlist, FUN = modernValue, nc = nc, ix = 144:164)
# Generate ensemble numbers, mean of the last 20 years of the timeseries (1994-2013)
if (file.exists("ensemble.rdata")) {
load("ensemble.rdata")
} else {
nens = 499
datmat <- matrix(nrow = nens, ncol = length(varlist))
colnames(datmat) <- varlist
enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")
floc <- paste0(ensloc,ensmember,subdir)
for(i in 1:nens){
vec <- rep(NA, length(varlist))
ensmember <- enslist[i]
fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
#print(fn)
try(nc <- nc_open(paste0(fn)))
try(vec <- sapply(varlist, FUN = modernValue, nc = nc, ix = 144:164))
datmat[i, ] <- vec
nc_close(nc)
}
save(nens, datmat,enslist,floc, file ="ensemble.rdata")
}
For each ensemble member and each variable, calculate the change from the 20 years at the start of the run, to the twenty years at the end of the run.
tsAnomaly <- function(nc, variable, startix = 1:20, endix = 144:164){
# A basic function to read a variable and calculate the anomaly at the end of the run
dat <- ncvar_get(nc, variable)
endMean <- mean(dat[endix])
startMean <- mean(dat[startix])
out <- endMean - startMean
out
}
tsAnomaly(nc = nc, variable = "npp_nlim_lnd_mean")
## [1] 2.410932e-09
# Generate ensemble mean of the last 20 years of the timeseries (1994-2013)
if (file.exists("anomaly_ensemble.rdata")) {
load("anomaly_ensemble.rdata")
} else {
nens = 499
datmatAnom <- matrix(nrow = nens, ncol = length(varlist))
colnames(datmatAnom) <- varlist
enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")
floc <- paste0(ensloc,ensmember,subdir)
for(i in 1:nens){
vec <- rep(NA, length(varlist))
ensmember <- enslist[i]
fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
#print(fn)
try(nc <- nc_open(paste0(fn)))
try(vec <- sapply(varlist, FUN = tsAnomaly, nc = nc))
datmatAnom[i, ] <- vec
nc_close(nc)
}
save(nens, datmatAnom, enslist,floc, file ="anomaly_ensemble.rdata")
}
Initial clean of data set, removing variables that don’t change, and removing NAs (models that didn’t run).
Y_nlevel0_ix <- which(is.na(datmat[,'year']))
YAnom_nlevel0_ix <- which(is.na(datmatAnom[,'year']))
# This should be true to proceed, or we'll have to start excluding the combined set.
identical(Y_nlevel0_ix, YAnom_nlevel0_ix)
## [1] TRUE
# Y is the whole data set
Y <- datmat
# Y_level0 is the 'cleaned' data set, truncated to variables that change, and removing NAs
Y_level0 <- datmat[-Y_nlevel0_ix, -c(2,30,31)]
Y_nlevel0 <- datmat[Y_nlevel0_ix, -c(2, 30, 31)]
# Y is the whole data set
YAnom <- datmatAnom
# Y.level0 is the 'cleaned' data set, truncated to variables that change, and removing NAs
YAnom_level0 <- datmatAnom[-YAnom_nlevel0_ix, -c(2,30,31)]
YAnom_nlevel0 <- datmatAnom[YAnom_nlevel0_ix, -c(2,30,31)]
# load the original design and input space, normalize to [0-1]
# Load up the data
lhs_i = read.table('~/brazilCSSP/code/brazil_cssp/analyze_u-ao732/data/ES_PPE_i/lhs_u-ao732.txt', header = TRUE)
lhs_ii = read.table('~/brazilCSSP/code/brazil_cssp/analyze_u-ao732/data/ES_PPE_ii/lhs_u-ao732a.txt', header = TRUE)
toplevel_ix = 1:499
# The raw input data is a latin hypercube
lhs = rbind(lhs_i, lhs_ii)[toplevel_ix, ]
lhs_level0 <- lhs[-Y_nlevel0_ix,]
X = normalize(lhs)
colnames(X) = colnames(lhs)
X_level0 <- X[-Y_nlevel0_ix,]
X_nlevel0 <- X[Y_nlevel0_ix,]
d = ncol(X)
# lower and higher bound on the normalised matrix for visualisation
rx = rbind(rep(0,32), rep(1,32))
(“probability of run failure” Could be an interesting project - logit transformation for probability of run failure? Some other ML like SVM?
There are clear run failure thresholds in the parameters rootd_ft_io and lai_max, and quite strong visual indications that a_wl_io and bio_hum_cn matter.
#simple way
par(oma = c(10,10,0,0))
#par(xpd = TRUE)
pairs(X_nlevel0,
xlim = c(0,1), ylim = c(0,1),
labels = 1:d,
col = makeTransparent('red', 150),
gap = 0,
pch = 20,
xaxt ='n', yaxt = 'n',
lower.panel = NULL)
reset()
legend('left', legend = paste(1:d, colnames(lhs)), cex = 1.2, bty = 'n')
knitr::knit_exit()